https://github.com/rjmaitri/04_Bartolini_Bob_2020.git
library(dplyr)
library(tidyr)
library(reactable)
library(lubridate)
library(readr)
# One Bootsrap Sample ####
#function to sample from a vector and replace
bootstrap <- function(vec){
one_boot<- sample(vec,
size = length(vec),
replace = TRUE)
return(one_boot)
}
#output with a random vector
bootstrap(c(1,4,5))
## [1] 4 1 4
#input vec, R(#of bootstraps), mean
#default to 1k= R
#function from part 1
bootstrap <- function(vec){
one_boot<- sample(vec,
size = length(vec),
replace = TRUE)
return(one_boot)
}
#input vec, R(#of bootstraps), mean
#default to 1k= R
boot_mean <- function(vec, R = 1000, fun = mean) {
out <- replicate(R, bootstrap(vec))
fun(out)
}
#input vector into mean function
boot_mean(c(3,4,5,3,2,50),10)
## [1] 10.51667
#compare to actual mean of vector
mean(c(3,4,5,3,2))
## [1] 3.4
#output the dataframe with sample size
data.frame(R = 1:10) %>%
rowwise(R) %>%
summarize(boot_mean(c(3,4,5,3,2,50),10, fun = mean))
| R | boot_mean(c(3, 4, 5, 3, 2, 50), 10, fun = mean) |
|---|---|
| 1 | 14.966667 |
| 2 | 12.533333 |
| 3 | 9.733333 |
| 4 | 7.833333 |
| 5 | 15.100000 |
| 6 | 10.300000 |
| 7 | 8.816667 |
| 8 | 16.000000 |
| 9 | 9.400000 |
| 10 | 13.483333 |
##write a function to sample from a vector
bootstrap <- function(vec){
one_boot<- sample(vec,
size = length(vec),
replace = TRUE)
return(one_boot)
}
#function that takes bootstrap samples and produces a mean
boot_mean <- function(vec, R = 1000, fun = mean) {
out <- replicate(R, bootstrap(vec))
fun(out)
}
#test to see that it works
boot_mean(c(3,4,5,3,2,50),10)
## [1] 11.16667
#compare to actual mean of vector
mean(c(3,4,5,3,2))
## [1] 3.4
#Write a function that takes the replicated bootstraps and produces statistics
stats_Bootsfunc <- function(vec, R = 1000, fun = mean){
vals <- replicate(R, bootstrap(vec))
#Statistics for data frame
bstraps_mean <- mean(vals)
mean_vec <- mean(vec)
firstquant <- quantile(vals,0.025)
thirdquant <- quantile(vals,0.975)
bias <- mean(vec) - mean(vals)
#arrange dataframe with statistics
out <- data.frame(mean_vec = mean_vec,
mean_samp = bstraps_mean,
firstquantile = firstquant,
thirdquantile = thirdquant,
bias = bias)
return(out)
}
#output the dataframe with the function
stats_Bootsfunc(c(4,5,2,2,3,2,4,16,8,9,9,19,8,32,32,32,32,41,4,8,4,5),1)
| mean_vec | mean_samp | firstquantile | thirdquantile | bias | |
|---|---|---|---|---|---|
| 2.5% | 12.77273 | 13.68182 | 2 | 36.275 | -0.9090909 |
reactable(data.frame(R = 1:100) %>%
rowwise(R) %>%
summarize(stats_Bootsfunc(c(3,4,5,5,5,6,5,10,4,5,4,4,16,8,9,9,19,18,20,21,22,4,8,4,5,5,6,5,6,4),R=100, fun = mean)))
library(readr)
pres_poll <- read_csv("president_polls.csv")
reactable(pres_poll, resizable = TRUE, wrap = FALSE, bordered = TRUE)
The presidential polls dataset is long, as it has two rows dedicated to each polling question.
class(pres_poll$start_date)
## [1] "character"
pres_poll$start_date<- as.Date(pres_poll$start_date, format = "%m/%d/%Y")
presPoll_filter <- filter(pres_poll, answer == "Biden" | answer == "Trump")
pres_current <- presPoll_filter[presPoll_filter$start_date >= "2020-09-29" & presPoll_filter$start_date <= "2020-10-10",]
reactable(pres_current, resizable = TRUE, wrap = FALSE, bordered = TRUE)
#filter by president and nationwide
Boot_pct <- pres_current %>%
select(state, answer, pct)
#replace NA's with zeros to keep dpylr happy
vec_3 <- replace(Boot_pct$state, is.na(Boot_pct$state), 0)
data <- data.frame(Nationwide = c(vec_3),
Candidate = c(Boot_pct$answer),
pct = Boot_pct$pct)
#Trump & National Pct filter for bootstrap
trumpbootdata <- data %>%
filter(Candidate == "Trump" & Nationwide == 0)
Donald_boot_mean <- sample(trumpbootdata$pct,
size = length(trumpbootdata$pct),
replace = TRUE) %>% mean
The bootstraped national average for Trump is 41.455
#Biden & National Pct filter for bootstrap
Bidenbootdata <- data %>%
filter(Candidate == "Biden" & Nationwide == 0)
Biden_boot_mean <- sample(Bidenbootdata$pct,
size = length(Bidenbootdata$pct),
replace = TRUE) %>% mean
The bootstraped national average for Biden is 50.7191071
library(tidyr)
poll_wide <- pres_current %>%
pivot_wider(names_from = state,
values_from = pct)
reactable(poll_wide, resizable = TRUE, wrap = FALSE, bordered = TRUE)
#Create new column with state, question_id and poll_id
Tidy_polls <- pres_current %>%
mutate(unique_ID = paste(state, question_id, poll_id, sep = "_"))
#create a df pivoted wider with columns for each state
Wide_poll <- Tidy_polls %>%
select(unique_ID, state, pct, answer) %>%
pivot_wider(names_from = state,
values_from = pct)
reactable(Wide_poll, resizable = TRUE, wrap = FALSE, bordered = TRUE)
#mean for Biden/Trump for each state
meanpct <-
aggregate(Wide_poll[, 3:25], list(Wide_poll$answer), mean, na.rm = TRUE)
reactable(meanpct, resizable = TRUE, wrap = FALSE, bordered = TRUE)
#function to display the mean difference for each state
poll_diff <- function(pct) {
result <- (pct - lag(pct))
}
Poll_Diff <- meanpct %>%
select_if(is.numeric) %>%
mutate_all(funs(difference = poll_diff(.)))%>%
head()
reactable(Poll_Diff[,24:46], resizable = TRUE, wrap = FALSE, bordered = TRUE)
The mean for Biden is 50.0705357 with a standard deviation of 3.4386823
#write a function to sample from Biden national polling avg
BidenUSAsims <- function(pct) {
Hopes <- replicate(1000, sample(Bidenbootdata$pct,
size = length(50),
replace = TRUE))
return(Hopes)
}
BidenSims <- BidenUSAsims()
BidenSims
## [1] 52.00 47.30 48.60 45.00 51.00 51.00 49.00 50.00 44.00 52.00 52.00 45.00
## [13] 52.00 48.60 52.00 52.00 53.00 52.00 51.00 52.00 51.00 52.00 51.00 54.88
## [25] 51.00 52.00 52.00 53.00 52.00 53.00 47.30 49.00 49.00 51.00 45.00 45.00
## [37] 43.00 52.00 49.00 49.00 54.88 52.00 45.00 45.00 52.00 51.00 53.00 44.00
## [49] 53.00 48.90 51.00 52.00 45.00 54.88 51.00 48.00 49.00 54.00 52.00 53.00
## [61] 52.00 53.00 52.00 44.00 52.00 46.00 48.00 57.00 46.00 48.00 51.00 46.00
## [73] 51.91 56.00 53.00 51.00 48.90 44.00 56.00 46.00 51.00 50.00 48.00 56.20
## [85] 52.00 49.00 51.91 50.00 48.00 45.00 51.00 49.00 45.00 53.00 52.00 48.00
## [97] 49.00 47.30 52.00 53.00 52.00 44.00 47.00 51.91 45.00 43.00 52.00 52.00
## [109] 51.00 51.00 51.91 49.00 52.00 45.00 54.00 56.20 53.00 49.00 53.00 54.00
## [121] 47.00 45.00 45.00 47.00 52.00 52.00 51.00 51.00 47.00 51.00 48.00 45.00
## [133] 44.00 43.00 45.00 45.00 54.88 56.20 51.00 51.00 49.00 51.00 44.00 48.00
## [145] 53.00 47.00 54.00 47.00 56.00 52.00 52.00 51.00 50.00 48.00 52.00 47.00
## [157] 52.00 51.00 57.00 49.00 45.00 49.00 47.00 54.00 45.00 52.00 51.00 45.00
## [169] 52.00 57.00 46.00 52.00 43.00 52.00 48.90 48.60 53.00 53.00 48.00 51.00
## [181] 43.00 56.20 51.00 48.90 45.00 53.00 50.00 45.00 52.00 52.00 51.00 51.00
## [193] 52.00 47.00 51.91 45.00 47.00 57.16 48.00 53.00 46.00 52.00 52.00 57.16
## [205] 51.00 53.00 43.00 46.00 54.88 54.00 56.20 43.00 52.00 45.00 57.00 52.00
## [217] 47.30 48.90 47.00 51.00 52.00 54.88 45.00 53.00 54.88 49.00 44.00 52.00
## [229] 52.00 52.00 51.00 53.00 49.00 51.00 45.00 53.00 48.90 48.60 52.00 52.00
## [241] 49.00 43.00 51.00 49.00 51.00 48.00 51.00 51.00 47.00 48.60 49.00 47.00
## [253] 45.00 57.16 47.00 57.16 53.00 51.91 56.20 52.00 51.00 47.30 52.00 53.00
## [265] 47.00 51.00 51.00 52.00 52.00 54.88 48.00 45.00 47.00 53.00 52.00 47.00
## [277] 44.00 54.88 53.00 48.60 48.90 52.00 51.00 45.00 51.00 54.00 52.00 43.00
## [289] 52.00 53.00 46.00 51.00 49.00 47.00 52.00 43.00 45.00 52.00 51.00 48.00
## [301] 48.90 52.00 53.00 51.00 48.00 47.00 50.00 52.00 51.00 49.00 48.00 52.00
## [313] 47.30 50.00 43.00 48.00 52.00 56.20 48.00 52.00 50.00 52.00 47.00 47.00
## [325] 51.00 56.20 53.00 45.00 50.00 51.00 43.00 48.90 53.00 52.00 52.00 56.20
## [337] 51.91 53.00 49.00 49.00 48.00 50.00 51.00 50.00 49.00 51.00 53.00 52.00
## [349] 51.91 52.00 48.90 52.00 43.00 52.00 51.00 47.00 48.00 54.00 48.60 47.00
## [361] 52.00 53.00 49.00 52.00 52.00 51.00 46.00 45.00 50.00 51.00 48.00 45.00
## [373] 54.88 50.00 47.00 52.00 49.00 52.00 52.00 45.00 57.16 45.00 51.00 51.00
## [385] 49.00 50.00 51.00 45.00 51.00 54.00 51.91 52.00 44.00 52.00 47.00 49.00
## [397] 48.00 51.91 53.00 52.00 51.00 48.00 52.00 51.00 47.00 51.00 51.00 56.20
## [409] 52.00 48.00 51.00 52.00 53.00 47.00 50.00 48.00 52.00 51.00 51.00 51.00
## [421] 57.16 49.00 49.00 49.00 57.00 51.00 53.00 51.00 57.00 51.00 51.00 44.00
## [433] 44.00 45.00 51.00 47.00 51.00 52.00 44.00 51.00 51.00 51.00 51.00 48.60
## [445] 53.00 52.00 52.00 51.00 51.00 48.00 52.00 45.00 45.00 51.00 48.60 53.00
## [457] 52.00 47.00 48.00 52.00 52.00 52.00 52.00 51.00 53.00 51.00 45.00 53.00
## [469] 52.00 43.00 45.00 57.00 53.00 49.00 43.00 44.00 51.00 44.00 57.00 52.00
## [481] 52.00 43.00 45.00 52.00 50.00 45.00 56.00 51.00 51.00 51.00 50.00 44.00
## [493] 51.00 53.00 51.00 51.00 51.00 54.00 49.00 52.00 51.00 48.00 49.00 54.88
## [505] 48.90 51.91 45.00 53.00 51.00 52.00 57.00 51.00 52.00 48.00 52.00 48.00
## [517] 53.00 52.00 53.00 48.00 52.00 47.30 43.00 52.00 51.00 49.00 50.00 51.00
## [529] 52.00 51.00 52.00 51.00 51.00 44.00 44.00 52.00 54.88 52.00 57.00 49.00
## [541] 45.00 47.00 51.91 51.00 51.00 54.00 52.00 54.00 52.00 53.00 51.00 52.00
## [553] 44.00 53.00 45.00 47.30 51.00 52.00 45.00 48.00 53.00 47.30 45.00 51.00
## [565] 54.00 53.00 48.00 57.16 53.00 51.00 49.00 52.00 54.88 43.00 52.00 56.00
## [577] 52.00 52.00 52.00 48.00 52.00 51.00 48.00 48.00 48.00 57.00 51.00 53.00
## [589] 52.00 51.00 52.00 47.30 45.00 48.00 48.00 57.16 57.00 52.00 53.00 50.00
## [601] 52.00 48.90 48.00 53.00 51.00 44.00 47.00 51.00 53.00 52.00 53.00 48.00
## [613] 52.00 47.00 51.00 51.00 56.20 50.00 51.00 54.00 51.00 54.00 45.00 47.00
## [625] 52.00 53.00 51.00 44.00 52.00 56.00 52.00 51.00 51.00 50.00 51.00 52.00
## [637] 47.30 52.00 52.00 52.00 44.00 51.00 53.00 49.00 52.00 52.00 45.00 43.00
## [649] 56.20 47.30 52.00 51.00 51.00 57.16 48.00 47.00 48.00 51.00 57.16 49.00
## [661] 48.90 51.00 49.00 52.00 51.00 57.16 45.00 57.16 44.00 53.00 52.00 48.00
## [673] 51.00 47.00 48.60 44.00 53.00 57.00 52.00 52.00 54.88 43.00 47.00 43.00
## [685] 43.00 48.00 49.00 56.20 44.00 52.00 52.00 52.00 47.00 48.00 51.00 51.00
## [697] 52.00 45.00 52.00 48.90 52.00 48.00 48.60 51.00 51.00 47.00 44.00 47.00
## [709] 50.00 52.00 51.00 51.00 52.00 56.20 48.00 48.60 53.00 52.00 52.00 51.00
## [721] 49.00 48.90 54.88 48.00 47.00 53.00 50.00 52.00 51.00 53.00 52.00 45.00
## [733] 52.00 45.00 51.00 43.00 45.00 45.00 53.00 52.00 51.00 52.00 52.00 43.00
## [745] 51.00 48.00 51.91 48.00 44.00 51.00 44.00 53.00 53.00 48.60 52.00 46.00
## [757] 57.00 52.00 56.20 53.00 49.00 48.00 51.00 48.00 52.00 48.00 52.00 52.00
## [769] 49.00 53.00 54.00 45.00 47.00 49.00 50.00 51.00 53.00 53.00 51.00 52.00
## [781] 48.00 47.30 51.00 51.00 51.00 47.00 48.60 51.00 52.00 51.00 51.00 51.00
## [793] 48.90 45.00 51.00 51.00 53.00 52.00 48.90 48.60 47.00 44.00 56.00 48.00
## [805] 45.00 44.00 51.00 52.00 52.00 52.00 47.00 53.00 52.00 48.90 51.00 45.00
## [817] 52.00 52.00 46.00 48.00 51.00 52.00 45.00 44.00 53.00 52.00 47.00 49.00
## [829] 49.00 56.00 51.00 48.90 52.00 51.00 48.60 51.00 51.91 53.00 54.00 54.88
## [841] 45.00 45.00 53.00 52.00 51.00 56.00 57.00 54.00 44.00 47.00 48.00 52.00
## [853] 56.20 48.00 48.00 57.16 52.00 48.00 56.00 57.00 52.00 52.00 43.00 54.88
## [865] 53.00 57.00 52.00 52.00 57.00 51.00 48.00 43.00 51.00 52.00 47.00 49.00
## [877] 54.00 48.00 51.00 48.00 51.00 51.00 47.00 52.00 46.00 52.00 49.00 50.00
## [889] 44.00 51.00 51.00 44.00 57.00 51.00 51.00 50.00 53.00 46.00 57.00 51.00
## [901] 52.00 53.00 50.00 46.00 46.00 52.00 52.00 51.00 52.00 44.00 52.00 51.00
## [913] 50.00 52.00 51.00 49.00 44.00 49.00 51.00 52.00 53.00 43.00 53.00 45.00
## [925] 52.00 49.00 50.00 48.00 53.00 48.00 47.30 43.00 51.00 51.00 54.00 53.00
## [937] 47.00 52.00 51.00 51.00 47.30 54.00 45.00 47.30 45.00 44.00 51.00 45.00
## [949] 53.00 54.88 53.00 52.00 57.00 51.00 51.00 45.00 53.00 52.00 52.00 52.00
## [961] 53.00 54.00 49.00 51.00 49.00 56.00 49.00 52.00 57.16 52.00 49.00 52.00
## [973] 43.00 49.00 57.16 47.00 53.00 51.00 52.00 43.00 46.00 47.00 52.00 53.00
## [985] 44.00 51.00 56.20 50.00 51.91 57.00 51.00 52.00 52.00 54.00 47.00 48.90
## [997] 48.00 53.00 51.00 47.00
#errorneous attempt ####
#convert to data frame and pivot
longBiden <- as.data.frame(BidenSims) %>%
pivot_longer(cols = everything(),
names_to = "sim_ID",
values_to= "pct")
reactable(longBiden, resizable = TRUE, wrap = FALSE, bordered = TRUE)